# -*- coding: utf-8 -*-
"""
Created on Sun Aug 18 09:38:32 2019

@author: rklotz
"""

import pandas as pd
import numpy as np

def add_vars_clean(df_track,df_ves_char,ports_df,mid2cntry_df) :
    # df_track is just a data frame with IMO numbers
        
    # now merge in full vessel characteristics
    df_track = df_track.merge(df_ves_char,on="IMO",how="left",indicator="merged_chars")
    df_track['merged_chars'] = np.where(df_track.merged_chars=='both',1,0)
    
    # getting flags for whether vessel had matched imo or not
    df_track['has_imo'] = np.where(df_track.IMO.isnull(),0,1)
    df_track['has_imo_orig'] = np.where(df_track.imo_orig.isnull(),0,1)
    df_track['has_imo_matched'] = np.where(df_track.imo_m.isnull(),0,1)
    
    # use ; instead of , in port_id lists
    df_track['PORT_ID'] = df_track['PORT_ID'].str.replace(',',';')
    
    # now merging in port names
    df_track = pd.merge(df_track,ports_df,left_on = 'PORT1' ,right_on='port_id' , how = 'left' )
    df_track = pd.merge(df_track,ports_df,left_on = 'PORT2' ,right_on='port_id' , how = 'left' , suffixes = ('1','2') )
    df_track = df_track.drop(['port_id1', 'port_id2'], axis=1)
    
    # getting direction (generally north or west)
    df_track['dirNW'] = df_track.portcode_SN1 < df_track.portcode_SN2
    df_track=df_track.applymap(lambda x: 1 if x == True else x)
    df_track=df_track.applymap(lambda x: 0 if x == False else x)
    
    # creating route variables
    df_track['minportSN'] = df_track[['portcode_SN1','portcode_SN2']].min(axis=1)
    df_track['maxportSN'] = df_track[['portcode_SN1','portcode_SN2']].max(axis=1)
    
    # getting names of min/max port
    ports_df_minmax = ports_df[['port_name','portcode_SN']].drop_duplicates()
    ports_df_minmax = ports_df_minmax.dropna(subset = ['portcode_SN'] )
    print(df_track.shape)
    df_track = pd.merge(df_track,ports_df_minmax,left_on = 'minportSN' ,right_on='portcode_SN' , how = 'left' )
    print( df_track.shape )
    df_track = pd.merge(df_track,ports_df_minmax,left_on = 'maxportSN' ,right_on='portcode_SN' , how = 'left' , suffixes = ('min','max') )
    print( df_track.shape )
    df_track = df_track.drop(['portcode_SNmax', 'portcode_SNmin'], axis=1)
    print( df_track.shape )
    # now creating route name
    df_track['route_name'] = df_track['port_namemin'] + '_' + df_track['port_namemax']
    print( df_track.shape )
    
    # classifying vessels according to AIS
    df_track['vessel_str'] = "Other"
    df_track['vessel_str'] = np.where( df_track.VesselType.isin( list(range(70,80,1)) + [1003 , 1004 ] ), "Cargo"  , df_track['vessel_str'] )
    df_track['vessel_str'] = np.where( df_track.VesselType.isin( [30,1001,1002 ]                 ), "Fishing"  , df_track['vessel_str'] )
    df_track['vessel_str'] = np.where( df_track.VesselType.isin( [35,1021]                       ), "Military"  , df_track['vessel_str'] )
    df_track['vessel_str'] = np.where( df_track.VesselType.isin( [ 0 ]                           ), "NA"  , df_track['vessel_str'] )
    df_track['vessel_str'] = np.where( df_track.VesselType.isin( list(range(60,70,1)) + [1012 , 1015 ] ), "Passenger"  , df_track['vessel_str'] )
    df_track['vessel_str'] = np.where( df_track.VesselType.isin( [ 36, 37, 1019 ]                ), "Pleasure"  , df_track['vessel_str'] )
    df_track['vessel_str'] = np.where( df_track.VesselType.isin( list(range(80,90,1)) + [ 1017,1024 ]  ), "Tanker"  , df_track['vessel_str'] )
    df_track['vessel_str'] = np.where( df_track.VesselType.isin( [21,22,31,32,52,1023,1025 ]     ), "Tug"  , df_track['vessel_str'] )
    
    ## recovering flags from MID
    df_track['mid'] = df_track.MMSI.astype(str).str[:3].astype(int)
    df_track['mid'] = np.where( df_track.MMSI.astype(str).str.len() == 9 , df_track['mid'] , np.nan ) # only get flag in 9 digits
    #
    ## merge country codes
    df_track = df_track.merge(mid2cntry_df, on='mid' , how='left')
    df_track['flag_cntry'] = np.where( df_track['flag_cntry'].isnull() ,  'ZZ' , df_track['flag_cntry'] )
    df_track['flag_country'] = np.where( df_track['flag_country'].isnull() ,  'OTHER' , df_track['flag_country'] )
    df_track = df_track.rename( columns={ 'flag_country':'flag_country_mid','flag_cntry':'flag_mid' } )
        
    # convert times from objects to times
    df_track['TIME1'] = pd.to_datetime(df_track.TIME1)
    df_track['TIME2'] = pd.to_datetime(df_track.TIME2)
    print( df_track.shape )
        
    return df_track


# helper functions ############################################################
def save_pnts(df_pnts_all,df_track,track_vars_to_merge,out_dta_points,out_csv_points ) :

    # merge in track variables to points
    df_pnts_all = df_pnts_all.merge(df_track[['OID','AISyear']+track_vars_to_merge],on=['OID','AISyear'],how="left")
    df_pnts_all = df_pnts_all.loc[df_pnts_all.drop_ind==False] # dropping unused interpolated tracks
    df_pnts_all.drop(columns=['drop_ind' ] , inplace=True )  
    df_pnts_all.columns = df_pnts_all.columns.str.replace(' ', '') # removing whitespace in names
    df_pnts_all.columns = df_pnts_all.columns.str.lower() # lowercase everything
    
    # save to stata
    #df_pnts_all = df_pnts_all.rename(columns={'Power Type':'powertype','Main Engine Fuel Type':'mainenginefueltype'})
    df_pnts_all['time1'] =  pd.to_datetime(df_pnts_all['time1'])
    df_pnts_all['time2'] =  pd.to_datetime(df_pnts_all['time2'])
    df_pnts_all['date_ds_apeep'] =  pd.to_datetime(df_pnts_all['date_ds_apeep']).dt.tz_convert(None)
    
    grouped = df_pnts_all.groupby('aisyear')
    for name, group in grouped:
        group.to_stata(out_dta_points % name ,convert_dates={'time1':'td' , 'time2':'td' , 'date_ds_apeep' :'td' },write_index=False,version=117)
        print("Saved File: {}".format(out_dta_points % name ))
    
        # save to csv
        group.to_csv(out_csv_points % name ,sep=',',index=False,header=True)
        print("Saved File: {}".format(out_csv_points % name ))


def get_nobound( entexit_nobound_csv , df_oid_mmsi_date, df_ves_char_fuel , sfoc_aux, EF_VOC ) :
    # adding OID so it can be merged to final tracks later on
    df_nobound = pd.read_csv( entexit_nobound_csv , index_col=False ,  low_memory=False) 
    df_nobound = df_nobound.drop(columns='OID')
    # this gets keeps only matches in both 100nm boundary data and no boundary data 
    df_nobound = df_nobound.merge(df_oid_mmsi_date,how='inner',left_on=['AIS_YEAR','MMSI','TIME'],right_on=['AISyear','MMSI','TIME'])
    
    a= df_nobound [df_nobound.duplicated(subset=['TIME','MMSI'] , keep=False) ]
    if len(a)>0 :
        check_duplicates # cheap error
        
    # to long format
    stubs = ['km','s3','hrs','s3_PM_inm','s3_SO2_inm', 's3_NOx_inm', 's3_VOC_inm','s3_NH3_inm','s3_PM','s3_SO2',
             'hrs_PM_inm','hrs_SO2_inm', 'hrs_NOx_inm', 'hrs_VOC_inm','hrs_NH3_inm','hrs_PM','hrs_SO2'  ]
    df_nobound_long = pd.wide_to_long(df_nobound, stubnames=stubs,sep='_in_' , suffix='\w+' , i=['OID','AIS_YEAR'], j='bound')
    
    # add chars for fuel calculations
    #df_nobound_long = df_nobound_long.merge(df_ves_char_fuel,on="IMO",how='left') 
    df_nobound_long = df_nobound_long.reset_index().merge(df_ves_char_fuel,on="IMO",how='left')
    
    
    # doing fuel and emissions calculations
    # replicates what is below, but sums over s3 and marginal damages are already done
    # ONLY DOING A SUBSET of calculations here (fuel and NOx+VOC damages, using only alpha_cons)
    
    # fuel (multiply s3 and hour sums by fuel consumption factors)
    df_nobound_long['Fmain_cons'] = df_nobound_long.s3 * df_nobound_long.alpha_cons 
    df_nobound_long['Faux']  = sfoc_aux * df_nobound_long['aux_sea'] * df_nobound_long['hrs'] 
                                # t per kwh * kw * hr --- tons fuel
    df_nobound_long['F_cons'] = df_nobound_long.Fmain_cons + df_nobound_long.Faux 
    
    # damages (multiply s3*MD and hrs*MD sums by consumption and emissions factors)
    # variables are sum s3*MD and hrs*MD 
    # need to do alpha*ef*s3_MD + sfoc * aux_sea*ef*hrs_MD
    df_nobound_long['TD_NOx_cons'] = ( df_nobound_long.alpha_cons * df_nobound_long.EF * df_nobound_long.s3_NOx_inm )\
                                        + ( sfoc_aux * df_nobound_long.aux_sea * df_nobound_long.EFaux * df_nobound_long.hrs_NOx_inm )
    df_nobound_long['TD_VOC_cons'] = EF_VOC * ( df_nobound_long.alpha_cons * df_nobound_long.s3_VOC_inm + sfoc_aux * df_nobound_long.aux_sea * df_nobound_long.hrs_VOC_inm )
    
    
    df_nobound_final = pd.pivot_table(df_nobound_long, columns=['bound'] , values=['TD_NOx_cons','TD_VOC_cons','F_cons'], index=['OID', 'AISyear'] )
    df_nobound_final.columns = ['_'.join(col) for col in df_nobound_final.columns] 
    df_nobound_final = df_nobound_final.reset_index()
    
    return df_nobound_final 


def get_imo_by_track (tracks_csv,mmsi_imo_dta,years) :

    # load mmsi imo map
    df_mmsi_imo = pd.read_stata( mmsi_imo_dta )
    df_mmsi_imo['m'] = df_mmsi_imo['my'].dt.month
    df_mmsi_imo['y'] = df_mmsi_imo['my'].dt.year
    df_mmsi_imo = df_mmsi_imo.rename(columns={'mmsi':'MMSI'})
    df_mmsi_imo['mmsi_scram'] = 1 
    
    # load all tracks
    dfs=[]
    for year in years :
        df_i = pd.read_csv(tracks_csv % year , index_col=False )
        df_i['AISyear'] = year
        dfs.append( df_i )
    
    df_tracks = pd.concat(dfs,axis=0,sort=True)
    
    df_tracks = df_tracks.loc[:,['OID','AISyear','IMO','MMSI','Length','Width','VesselType','TIME1','TIME2','PORT1','PORT2','INTERP_FLAG','RIGHT1','RIGHT2']]
    df_tracks['IMO'] = df_tracks.IMO.replace(-9999,np.nan) # replace IMO
    
    df_tracks['m'] = pd.to_datetime( df_tracks['TIME1']).dt.month
    df_tracks['y'] = pd.to_datetime( df_tracks['TIME1']).dt.year
    df_tracks['mmsi_scram'] = np.where(df_tracks.AISyear.isin([2010,2011,2012,2013,2014]),1,0)  # so merge only occurs for correct years
            
    df_tracks = df_tracks.merge(df_mmsi_imo[['imo_m','MMSI','m','y','mmsi_scram']],on=['MMSI','m','y','mmsi_scram'],how='left')
    # fill down (within MMSI in scrambled years)
    df_tracks = df_tracks.sort_values(by=['MMSI', 'TIME1'])
    df_tracks[['imo_m']] = df_tracks[['MMSI','imo_m','mmsi_scram']].groupby(['MMSI','mmsi_scram'],as_index=False).fillna(method='ffill')
    #df_tracks[['MMSI','imo_m']] = df_tracks[['MMSI','imo_m']].groupby('MMSI',as_index=False).fillna(method='ffill')
    #df_tracks['IMO'] = df_tracks.imo_m 
    
    df_tracks['IMO_orig'] = df_tracks.IMO 
    df_tracks['IMO'] = np.where( df_tracks.AISyear.isin([2010,2011,2012,2013,2014]) , df_tracks.imo_m , df_tracks.IMO_orig )
    
    # getting flags for whether vessel had matched imo or not
    df_tracks['has_imo'] = np.where(df_tracks.IMO.isnull(),0,1)
    df_tracks['has_imo_matched'] = np.where(df_tracks.imo_m.isnull(),0,1)
    print(df_tracks.groupby('AISyear')[['has_imo','has_imo_matched']].mean())
    
    # create time variable 
    df_tracks['TIME'] = np.where(df_tracks.PORT1<=13 , df_tracks.TIME1 , df_tracks.TIME2 )
    
    # flag entrances and exits for which we have tracks without bounds
    # need to only keep those with RIGHT flags equal zero, because this is what we keep in nobounds analysis
    df_tracks['entexit'] = (df_tracks.INTERP_FLAG==0) & ( ( (df_tracks.PORT1<=13) & (df_tracks.PORT2>=100) & (df_tracks.RIGHT1==0) ) | ( (df_tracks.PORT2<=13) & (df_tracks.PORT1>=100) & (df_tracks.RIGHT2==0) ) ) 
    
    # OID TO IMO map
    df_track_imo = df_tracks.loc[:,['AISyear','OID','imo_m']]
    
    # MMSI and date to OID and IMO (for entrances and exits)
    df_oid_mmsi_date = df_tracks.loc[df_tracks.entexit==1,['AISyear','OID','IMO','MMSI','TIME']]
    
       
    return df_track_imo , df_oid_mmsi_date
    

# could add matching step here

# load all tracks for 2010-2014
    # only need MMSI, month, year and track id
    # merge in IMO
    # save track_id, IMO in df